import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import anndata as ad
import scanpy as sc
import scvi
import torch
# from lightning.pytorch.loggers import CSVLogger
import session_info
import warnings
torch.set_float32_matmul_precision('high')
from pyprojroot.here import here
# Setting some parameters
warnings.filterwarnings("ignore")
adata = ad.read(here("output/integration/Myeloid cells/Myeloid_cells_scVI_integrated_10_latent.h5ad"))
[rank: 0] Global seed set to 0 /scratch/groups/singlecell/software/anaconda3/envs/csf/lib/python3.10/site-packages/flax/struct.py:132: FutureWarning: jax.tree_util.register_keypaths is deprecated, and will be removed in a future release. Please use `register_pytree_with_keys()` instead. jax.tree_util.register_keypaths(data_clz, keypaths) /scratch/groups/singlecell/software/anaconda3/envs/csf/lib/python3.10/site-packages/flax/struct.py:132: FutureWarning: jax.tree_util.register_keypaths is deprecated, and will be removed in a future release. Please use `register_pytree_with_keys()` instead. jax.tree_util.register_keypaths(data_clz, keypaths)
# clustering with several resolutions
sc.pp.neighbors(adata, use_rep="X_scVI")
resolutions_to_explore = [0.05, 0.1, 0.5, 1, 1.5, 2]
for res in resolutions_to_explore:
leiden_cluster = "leiden_res_" + str(res)
sc.tl.leiden(adata, resolution=res, key_added=leiden_cluster)
sc.tl.umap(adata)#, init = 'X_scVI') ???
sc.pl.embedding(adata, basis="X_umap", color=['project', 'patient'], show=True, size=10)
sc.pl.embedding(adata, basis="X_umap", color='annot', show=True, size=10)
for res in resolutions_to_explore:
leiden_cluster = "leiden_res_" + str(res)
sc.set_figure_params(figsize=(7, 5))
sc.pl.embedding(adata, basis="X_umap", color=leiden_cluster, show=True, size=10, legend_loc="on data")
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.tl.rank_genes_groups(adata, 'leiden_res_1', method='t-test')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, ncols=2)
# set folder to save results
folder_for_immunologist = here('output/integration/Myeloid cells/markers/')
# Compute markers
sc.tl.rank_genes_groups(adata,
groupby='leiden_res_1',
method='wilcoxon',
use_raw = False)
# Save markers
rank_genesDF = sc.get.rank_genes_groups_df(adata, group=None)
rank_genesDF.to_csv(here('{}/marker_genes_res1_scVI.csv'.format(folder_for_immunologist)))
rank_genes_excel_path = here('{}/marker_genes_res1_scVI.xlsx'.format(folder_for_immunologist))
unique_groups = rank_genesDF["group"].unique()
with pd.ExcelWriter(rank_genes_excel_path, engine="openpyxl") as writer:
for group in unique_groups:
group_rank_genesDF = rank_genesDF[rank_genesDF["group"] == group]
group_rank_genesDF = group_rank_genesDF[group_rank_genesDF["logfoldchanges"] > 0.2]
group_rank_genesDF.to_excel(writer, sheet_name=group, index=False)
adata.write(here("output/integration/Myeloid cells/Myeloid_cells_scVI_clustered.h5ad"), compression="gzip")
pd.DataFrame(adata.obsm["X_umap"], index=adata.obs_names).to_csv(here("output/integration/Myeloid cells/Myeloid_cells_scVI_UMAP.csv"))
pd.DataFrame(adata.obs[["leiden_res_" + str(num) for num in resolutions_to_explore]]).to_csv(here("output/integration/Myeloid cells/Myeloid_cells_scVI_clusters.csv"))